Loading packages

# JSON libraries
library(rjson)
library(RJSONIO)
library(jsonlite)

# To read the netCDF file (check for better options!)
library(ncdf4)

# To get the min bounding box
library(sp)
library(shotGroups)

# To plot the track of the glider
library(spacetime)
library(trajectories)
library(leaflet)

# To format dates
library(lubridate)

# ?? Not sure why I was using this
library(tidyverse)

# To format text in the console (does not format text in R markdown)
require(crayon)

1. Formatiing time and getting coordinates

Writing a function to get the Time dimenson from the glider netCDF file

The following function gets the time dimension from the glider netCDF file and converts it from double to Unix time. The function also extracts the time stamp and the date as well. This function returns a dataframe that has the formatted time in addition the timestamp and the date. This function takes a ncdf4 object as an argument.

formatTimeDim <- function(file){
    tryCatch(
        expr = {
            # Getting the time dimension
            time = file$dim$TIME
            # Formatting time
            time_formatted = as.POSIXct(time$vals, origin="1970-01-01")
            # Extracting the time stamp from the unix time object
            timeStamp = format(time_formatted,'%H:%M:%S')
            # Extracting the date from the unix time object
            date = as.Date(time_formatted)
            # Combining the formatted time, the time stamp and the date in one data frame
            all = cbind.data.frame(time_formatted, timeStamp, date)
            
            message('The time dimension has been successfully formatted!')
          # returning the dataframe
          return(all)
            
        },
        error = function(e){
            message('Caught an error while formatting time!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning while formatting time!')
            print(w)
        }
    )    
}

Combining lat and long with time

This function should return a dataframe that has the lat, lon, the time formatted, time stamp and date with no NA data.

combineLatLongWithTime <- function(filePath){
    tryCatch(
        expr = {
              # Reading the file
              file = nc_open(filePath)
              # Getting the latitude and longitude
              lon = ncvar_get(file,"LONGITUDE")
              lat = ncvar_get(file,"LATITUDE")
              # Calling the time function to get the dataframe that has the time formatted
              time = formatTimeDim(file)
              # Combining the time data frame with lat and long. This dataframe has NA values
              dataframe = cbind.data.frame(lat, lon, time)
              
              cat(bold("The number of NA this file has is: ", sum(is.na(dataframe))))
              cat("\n")
              
              # removing NA values
              dataframe_No_NA = dataframe %>% drop_na()
              
              return(dataframe_No_NA)
            
            message("dataframe successfully created!")
        },
        error = function(e){
            message('Caught an error!')
            print(e)
        },
        warning = function(w){
            message('Caught an warning!')
            print(w)
        }
    )    
}

2. Getting the mission track

A function to get the track of the netCDF file

This function takes the a datafram that has lat, lon, time_formatted, time stamp and date as an argument. This is the output of the function combineLatLongWithTime I am not sure how to select the first tow colomns of the dataframe that has lat, lon, time…… is it better if I put lat,lon in a seperate dataframe???? It looks to me that lat_lon_time_No_NA %>% select(1:2) makes things more complicated!!!

getMissionTrack = function(lat_lon_time_No_NA){
  # Setting the reference system
  crs = CRS("+proj=longlat +datum=WGS84")
  # Creating a spatial points object
  sp = SpatialPoints(lat_lon_time_No_NA %>% select(1:2),crs)
  # Getting time
  time = lat_lon_time_No_NA$time_formatted
  # Providing the mission's coordinates
  data = data.frame(lat_lon_time_No_NA %>% select(1:2))
  # Creating an STIDF object
  stidf = STIDF(sp, time, data)
  # Creating a track object
  gliderTrack = Track(stidf)
  # Returning a track object
  return(gliderTrack)
}

A function to get the first value of each day in the dataframe

This function also takes a dataframe that has lat, lon, time_fomratted, timestamp and date as an input. This is the output of the function combineLatLongWithTime.

getMissionTrackLabels = function(lat_lon_time_No_NA){
  # Splitting the dataframe based on date
  # This results in a list of dataframes
  list = split(lat_lon_time_No_NA, lat_lon_time_No_NA$date)
  #This provides the first row of each day in the data frame
  first_days = do.call(rbind, (lapply(list, function(x) x[1,])))
  return(first_days)
}

Generalizing the mission track

This function should generalize the mission track. I still need to decide what method I should use for generalization. (distance or time) I also need to think what kind of structure I need to put the generalized track in. This function is not complete yet.

generalizeMissionTrack = function(missionTrack){
  generalize(missionTrack)
}

3. Plotting Tracks

A function to plot the track of the netCDF file

This function takes the output of the function getMissionTrack and plots it on a leaflet map in R. This function is only meant to explore how the tracks are plotted and therefore it is not part of the metadata that needs to be transported.

plotMissionTrack = function(gliderTrack){
  # plotting the map
   return(leaflet() %>%addTiles() %>% addPolylines(lat = gliderTrack@data[,1], lng = gliderTrack@data[,2]))
}

A function to plot the mission track with labels

I’m still thinking of plotting the first value of the mission in green and the last value in red. I think this requires extracting the first value in a separate list or a dataframe and the same for the last value and then add them to the plotting function.

plotMissionTrackWithLabels = function(lat_lon_time_No_NA, gliderTrack){
  track = plotMissionTrack(gliderTrack)
  first_days = getMissionTrackLabels(lat_lon_time_No_NA)
  return (track %>% addAwesomeMarkers(data = first_days, lat = ~first_days$lat, lng = ~first_days$lon, label = first_days$time_formatted))
}

4. Getting the minimum bounding box

Getting the minimum bounding box

you can get the minbbox using the following track attribute

5. Testing the functions

Testing the functions

This part is for testing the functions. These files are randomly chosen.

Providing the path to the netCDF test file

# This is where the netCDF data files reside
netCDF_Data_Files_Path = "/home/fadi/DataX1/University/WWU/WWU 5/netCDF sample files/"

#File1
# This is the files that needs to be read
netCDF_File1_Name = "wallis_mooset01_R.nc"
# This is the complete path to the netCDF file that is being read
file_Path_File1 = paste0(netCDF_Data_Files_Path,netCDF_File1_Name)

#File2
# This is the files that needs to be read
netCDF_File2_Name = "sg558_fram_jun2013_R.nc"
# This is the complete path to the netCDF file that is being read
file_Path_File2 = paste0(netCDF_Data_Files_Path,netCDF_File2_Name)

#File3
# This is the files that needs to be read
netCDF_File3_Name = "ideep02_0012_R.nc"
# This is the complete path to the netCDF file that is being read
file_Path_File3 = paste0(netCDF_Data_Files_Path,netCDF_File3_Name)

Applying the functions

File 1

I need to ecnapsulate those steps into one function. This should be done after I finish extracting metadata.

# Formatting time and getting lat and lon in one dataframe
lat_lon_time_No_NA_File1 = combineLatLongWithTime(file_Path_File1)
## The time dimension has been successfully formatted!
## The number of NA this file has is:  464
# Getting the mission track
missionTrack_File1 = getMissionTrack(lat_lon_time_No_NA_File1)
# Getting the mission track labels
missionTrackLabels_File1 = getMissionTrackLabels(lat_lon_time_No_NA_File1)
# Plotting the track
plotMissionTrack(missionTrack_File1)
# Plotting the track with labels
plotMissionTrackWithLabels(lat_lon_time_No_NA_File1, missionTrack_File1)

Exampels of the genaralize function

Questions: What does the distance represent in case of having lat and long? How are the tracks generalized based on a time interval? Should I include other parameters when using the function generalize?

# Generalizing the track based on distance
mission_Track_File1_generalized_distance = generalize(missionTrack_File1, distance = 100)
# Generalizing the track based on time interval
# mission_Track_File1_generalized_timeInterval <- generalize(missionTrack_File1, timeInterval = 60)